# The version of R used is 3.6.1。
#Read listings_detail.csv file
df<-read.csv("listing_detail.csv",encoding="UTF-8",stringsAsFactors=F)
#Query the data that is not listed in the wrong column
df2<-subset(df,(df$instant_bookable=="t"|df$instant_bookable=="f"|df$instant_bookable=="") &
(df$host_has_profile_pic=="t"|df$host_has_profile_pic=="f"|df$host_has_profile_pic=="") &
(df$has_availability=="t"|df$has_availability=="f"|df$has_availability==""))
dim(df2)
## [1] 25330 74
#Convert number_of_reviews to numeric
df2$number_of_reviews<-as.numeric(df2$number_of_reviews)
#Query data with number_of_reviews greater than 0
df3<-subset(df2,df2$number_of_reviews>0)
dim(df3)
## [1] 11600 74
#Delete columns with too many missing values
re<-numeric(74)
for(i in 1:74){
a<-df3[,i]
re[i]<-length(a[is.na(a)|a==""|a=="N/A"])/length(a)}
re
## [1] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0002586207
## [6] 0.0164655172 0.3248275862 0.0000000000 0.0000000000 0.0000000000
## [11] 0.0000862069 0.0000862069 0.0011206897 0.4183620690 0.5970689655
## [16] 0.5970689655 0.4132758621 0.0000862069 0.0000862069 0.0000862069
## [21] 0.3402586207 0.0000862069 0.0000862069 0.0000000000 0.0000862069
## [26] 0.0000862069 0.3246551724 0.0000000000 1.0000000000 0.0000000000
## [31] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000
## [36] 0.0006896552 0.0641379310 0.0054310345 0.0000000000 0.0000000000
## [41] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [46] 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000
## [51] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [56] 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000
## [61] 0.0475862069 0.0507758621 0.0493965517 0.0512931034 0.0497413793
## [66] 0.0513793103 0.0515517241 1.0000000000 0.0000000000 0.0000000000
## [71] 0.0000000000 0.0000000000 0.0000000000 0.0000000000
df4<-df3[,-which(re>0.1)]
#Output deleted column names
names(df3)[which(re>0.1)]
## [1] "neighborhood_overview" "host_about"
## [3] "host_response_time" "host_response_rate"
## [5] "host_acceptance_rate" "host_neighbourhood"
## [7] "neighbourhood" "neighbourhood_group_cleansed"
## [9] "bathrooms" "calendar_updated"
## [11] "license"
#Delete columns with only one level factor
re<-numeric(63)
for(i in 1:63){
a<-df4[,i]
re[i]<-length(unique(a))}
re
## [1] 11600 11600 1 1 11456 11279 11532 9521 9521 3727 2873 835
## [13] 3 9496 9496 61 61 278 3 3 38 8069 8679 62
## [25] 4 16 30 11 19 11041 647 57 156 56 62 146
## [37] 147 184 203 1 31 61 91 365 1 282 78 16
## [49] 2339 1828 46 10 10 10 10 9 10 2 51 47
## [61] 19 10 537
df5<-df4[,-which(re<=1)]
#Output deleted column names
names(df4)[which(re<=1)]
## [1] "scrape_id" "last_scraped" "has_availability"
## [4] "calendar_last_scraped"
#Meaningless column
del<-c("listing_url","picture_url","host_id","host_url","host_name","host_since",
"host_thumbnail_url","host_picture_url","first_review","last_review")
df6<-df5[,!(names(df5)%in%del)]
#Texts which need to do text analysis
#amenities
#property_type
#host_verifications
#name
#description
#host_location
a<-character(nrow(df6))
#Text column merge
for(i in c(2,3,4,8,14,20)){a<-paste(a,df6[,i],sep=" ")}
#Text information processing
library(jiebaRD)
## Warning: package 'jiebaRD' was built under R version 3.6.3
library(jiebaR)
## Warning: package 'jiebaR' was built under R version 3.6.3
library(NLP)
## Warning: package 'NLP' was built under R version 3.6.3
library(tm)
## Warning: package 'tm' was built under R version 3.6.3
#Split vocabulary
keyword<-segment(a,worker(byline=T))
#Generate thesaurus
Thesaurus<-VCorpus(VectorSource(keyword))
#Generate document term frequency matrix
keywordmatrix<-DocumentTermMatrix(Thesaurus)
#Remove sparse word frequency
keywordmatrix2<-removeSparseTerms(keywordmatrix, sparse=0.95)
keywordmarix3<-as.matrix(keywordmatrix2)
dim(keywordmarix3)
## [1] 11600 390
#Delete text information column
df7<-df6[,-c(2,3,4,8,14,20)]
#bathrooms_text needs special processing of text information
unique(df7$bathrooms_text)
## [1] "1 shared bath" "3 baths" "1 private bath" "1 bath"
## [5] "2.5 baths" "1.5 shared baths" "2.5 shared baths" "2 baths"
## [9] "0 shared baths" "1.5 baths" "" "0 baths"
## [13] "2 shared baths" "5 baths" "Half-bath" "4 baths"
## [17] "3.5 baths" "4.5 baths" "6 baths" "Shared half-bath"
## [21] "5.5 baths" "3 shared baths" "3.5 shared baths" "8 shared baths"
## [25] "6.5 baths" "4 shared baths" "4.5 shared baths" "6 shared baths"
## [29] "18 baths" "18 shared baths"
library(stringr)
## Warning: package 'stringr' was built under R version 3.6.3
df7$bathsum<-as.numeric(str_extract(df7$bathrooms_text,"\\d\\.{0,1}\\d{0,1}"))
df7$bathclass<-ifelse(str_detect(df7$bathrooms_text,"share"),1,0)
df7$price<-as.numeric(str_replace_all(df7$price,"\\$|,",""))
#Delete the bathrooms_text column
df8<-df7[,-12]
#Change the order of the columns to make the factor type and the data set separate
df8<-df8[,c(1,2,5:7,37,10,3:4,8:9,11:26,28:36,38:44,27)]
#Turn to factor variable
for(i in 2:7){df8[,i]<-as.factor(df8[,i])}
#Turn to numeric variable
for(i in 8:41){df8[,i]<-as.numeric(df8[,i])}
#Merge data set and word frequency matrix
df9<-cbind(df8,keywordmarix3)
keywordmarix3[1:100,1:6]
## Terms
## Docs access across after aid air airport
## 1 0 0 0 0 0 0
## 2 0 0 0 1 1 0
## 3 0 0 1 0 1 0
## 4 0 0 0 1 1 0
## 5 1 0 0 1 1 0
## 6 1 0 1 0 0 0
## 7 0 0 0 0 0 0
## 8 0 0 0 0 0 0
## 9 0 0 0 0 0 0
## 10 1 0 0 1 0 0
## 11 1 0 0 0 0 0
## 12 1 0 0 0 0 0
## 13 1 0 0 1 1 1
## 14 0 0 0 0 1 2
## 15 1 0 0 1 0 0
## 16 0 0 0 0 0 0
## 17 2 0 0 0 0 0
## 18 0 0 0 1 1 0
## 19 2 1 0 0 1 0
## 20 0 0 0 1 0 0
## 21 0 0 0 0 1 0
## 22 0 0 0 0 0 0
## 23 0 0 0 0 1 0
## 24 0 0 0 0 1 0
## 25 0 1 0 0 0 0
## 26 0 0 0 0 0 0
## 27 0 0 0 0 0 0
## 28 0 0 0 0 2 0
## 29 2 0 0 1 1 0
## 30 1 0 0 1 0 0
## 31 0 0 1 1 0 0
## 32 2 0 0 0 0 0
## 33 0 0 0 1 2 0
## 34 0 0 0 0 0 0
## 35 0 0 0 0 1 0
## 36 0 0 2 0 0 0
## 37 0 0 0 0 1 0
## 38 0 0 0 0 0 0
## 39 0 0 0 0 0 0
## 40 0 0 0 1 0 0
## 41 1 0 0 0 2 0
## 42 0 0 0 0 0 0
## 43 1 0 1 0 0 0
## 44 0 0 0 1 0 0
## 45 0 0 0 0 0 0
## 46 0 0 0 1 1 0
## 47 0 0 0 1 0 0
## 48 0 0 0 1 2 0
## 49 0 0 0 0 2 0
## 50 0 0 0 1 0 0
## 51 0 0 0 0 0 0
## 52 0 1 0 0 0 0
## 53 0 0 0 0 1 0
## 54 0 0 0 0 2 0
## 55 0 0 0 1 0 1
## 56 0 0 0 0 2 0
## 57 0 0 0 1 1 0
## 58 0 0 0 0 3 0
## 59 0 0 0 0 0 0
## 60 0 0 0 0 0 0
## 61 0 0 0 0 0 0
## 62 0 0 0 1 0 0
## 63 0 0 0 1 1 0
## 64 0 0 0 0 1 0
## 65 1 0 0 0 0 0
## 66 0 0 0 0 2 0
## 67 0 0 0 1 0 1
## 68 0 0 0 1 0 0
## 69 0 0 0 0 2 0
## 70 1 0 0 0 0 0
## 71 0 0 0 1 0 0
## 72 0 0 0 0 0 0
## 73 0 0 0 0 1 0
## 74 0 0 0 0 0 0
## 75 0 0 0 1 0 0
## 76 1 0 1 0 0 0
## 77 1 0 0 1 0 0
## 78 0 0 0 0 0 0
## 79 0 0 1 0 1 0
## 80 0 0 0 0 0 0
## 81 0 0 0 0 0 0
## 82 0 0 0 0 0 0
## 83 1 0 0 0 1 0
## 84 0 0 0 0 1 0
## 85 0 0 0 0 2 0
## 86 0 0 0 0 0 0
## 87 0 0 0 0 0 0
## 88 0 0 0 0 0 0
## 89 0 0 1 0 1 0
## 90 2 0 0 0 0 0
## 91 0 0 1 0 1 0
## 92 0 0 1 0 1 0
## 93 2 0 0 1 1 1
## 94 0 1 0 0 1 1
## 95 0 0 0 0 0 0
## 96 0 0 0 0 0 0
## 97 2 0 0 0 1 0
## 98 0 0 0 1 2 0
## 99 0 0 0 0 1 0
## 100 0 0 0 0 1 0
#Delete lines with missing values
df10<-na.omit(df9)
dim(df10)
## [1] 10220 434
df10<-df10[order(-df10$number_of_reviews),]
df10$number_of_reviews<-rep(c(1,0),c(100,10120))
names(df10)[44]<-"istop100"
df10<-df10[sample(nrow(df10),nrow(df10)),]
write.csv(df10,"cleaned_data.csv",row.names=F)
#Read cleaned_data.csv file
df<-read.csv("cleaned_data.csv")
dim(df)
## [1] 10220 434
summary(df)
## id host_is_superhost host_has_profile_pic
## Min. : 11156 f:8429 f: 18
## 1st Qu.: 9686147 t:1791 t:10202
## Median :21470862
## Mean :21386240
## 3rd Qu.:31805768
## Max. :46128796
##
## host_identity_verified neighbourhood_cleansed instant_bookable
## f:2053 Sydney :2540 f:6341
## t:8167 Waverley :1464 t:3879
## Randwick : 876
## Warringah: 547
## Manly : 507
## Woollahra: 440
## (Other) :3846
## room_type host_listings_count host_total_listings_count
## Entire home/apt:6658 Min. : 0.000 Min. : 0.000
## Hotel room : 65 1st Qu.: 1.000 1st Qu.: 1.000
## Private room :3384 Median : 1.000 Median : 1.000
## Shared room : 113 Mean : 6.201 Mean : 6.201
## 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :225.000 Max. :225.000
##
## latitude longitude accommodates bedrooms
## Min. :-34.10 Min. :150.7 Min. : 1.000 Min. : 1.000
## 1st Qu.:-33.90 1st Qu.:151.2 1st Qu.: 2.000 1st Qu.: 1.000
## Median :-33.88 Median :151.2 Median : 2.000 Median : 1.000
## Mean :-33.86 Mean :151.2 Mean : 3.482 Mean : 1.688
## 3rd Qu.:-33.83 3rd Qu.:151.3 3rd Qu.: 4.000 3rd Qu.: 2.000
## Max. :-33.40 Max. :151.3 Max. :16.000 Max. :20.000
##
## beds price minimum_nights maximum_nights
## Min. : 0.000 Min. : 11.0 Min. : 1.000 Min. : 1.0
## 1st Qu.: 1.000 1st Qu.: 78.0 1st Qu.: 1.000 1st Qu.: 30.0
## Median : 1.000 Median : 126.0 Median : 2.000 Median :1125.0
## Mean : 2.024 Mean : 199.8 Mean : 5.307 Mean : 667.4
## 3rd Qu.: 3.000 3rd Qu.: 215.2 3rd Qu.: 5.000 3rd Qu.:1125.0
## Max. :19.000 Max. :28613.0 Max. :500.000 Max. :1825.0
##
## minimum_minimum_nights maximum_minimum_nights minimum_maximum_nights
## Min. : 1.000 Min. : 1.000 Min. :1.000e+00
## 1st Qu.: 1.000 1st Qu.: 2.000 1st Qu.:4.500e+01
## Median : 2.000 Median : 3.000 Median :1.125e+03
## Mean : 4.986 Mean : 5.831 Mean :8.413e+05
## 3rd Qu.: 4.000 3rd Qu.: 5.000 3rd Qu.:1.125e+03
## Max. :500.000 Max. :500.000 Max. :2.147e+09
##
## maximum_maximum_nights minimum_nights_avg_ntm maximum_nights_avg_ntm
## Min. :1.000e+00 Min. : 1.000 Min. :1.000e+00
## 1st Qu.:4.700e+01 1st Qu.: 1.975 1st Qu.:4.500e+01
## Median :1.125e+03 Median : 2.100 Median :1.125e+03
## Mean :8.413e+05 Mean : 5.389 Mean :8.413e+05
## 3rd Qu.:1.125e+03 3rd Qu.: 5.000 3rd Qu.:1.125e+03
## Max. :2.147e+09 Max. :500.000 Max. :2.147e+09
##
## availability_30 availability_60 availability_90 availability_365
## Min. : 0.000 Min. : 0.00 Min. : 0.00 Min. : 0.0
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.0
## Median : 0.000 Median : 0.00 Median : 0.00 Median : 0.0
## Mean : 7.434 Mean :15.92 Mean :25.95 Mean : 94.3
## 3rd Qu.:17.000 3rd Qu.:35.00 3rd Qu.:59.00 3rd Qu.:179.0
## Max. :30.000 Max. :60.00 Max. :90.00 Max. :365.0
##
## number_of_reviews_ltm number_of_reviews_l30d review_scores_rating
## Min. : 0.000 Min. : 0.0000 Min. : 20.00
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.: 91.00
## Median : 1.000 Median : 0.0000 Median : 97.00
## Mean : 4.071 Mean : 0.1954 Mean : 93.51
## 3rd Qu.: 4.000 3rd Qu.: 0.0000 3rd Qu.:100.00
## Max. :147.000 Max. :29.0000 Max. :100.00
##
## review_scores_accuracy review_scores_cleanliness review_scores_checkin
## Min. : 2.000 Min. : 2.000 Min. : 2.000
## 1st Qu.: 9.000 1st Qu.: 9.000 1st Qu.:10.000
## Median :10.000 Median :10.000 Median :10.000
## Mean : 9.551 Mean : 9.219 Mean : 9.729
## 3rd Qu.:10.000 3rd Qu.:10.000 3rd Qu.:10.000
## Max. :10.000 Max. :10.000 Max. :10.000
##
## review_scores_communication review_scores_location review_scores_value
## Min. : 2.000 Min. : 2.000 Min. : 2.00
## 1st Qu.:10.000 1st Qu.:10.000 1st Qu.: 9.00
## Median :10.000 Median :10.000 Median :10.00
## Mean : 9.733 Mean : 9.702 Mean : 9.33
## 3rd Qu.:10.000 3rd Qu.:10.000 3rd Qu.:10.00
## Max. :10.000 Max. :10.000 Max. :10.00
##
## calculated_host_listings_count calculated_host_listings_count_entire_homes
## Min. : 1.000 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 0.00
## Median : 1.000 Median : 1.00
## Mean : 5.224 Mean : 4.23
## 3rd Qu.: 2.000 3rd Qu.: 1.00
## Max. :136.000 Max. :136.00
##
## calculated_host_listings_count_private_rooms
## Min. : 0.0000
## 1st Qu.: 0.0000
## Median : 0.0000
## Mean : 0.8863
## 3rd Qu.: 1.0000
## Max. :77.0000
##
## calculated_host_listings_count_shared_rooms reviews_per_month bathsum
## Min. : 0.00000 Min. : 0.0100 Min. : 0.000
## 1st Qu.: 0.00000 1st Qu.: 0.0700 1st Qu.: 1.000
## Median : 0.00000 Median : 0.2000 Median : 1.000
## Mean : 0.06477 Mean : 0.6241 Mean : 1.368
## 3rd Qu.: 0.00000 3rd Qu.: 0.7600 3rd Qu.: 1.500
## Max. :17.00000 Max. :11.0600 Max. :18.000
##
## bathclass istop100 access across
## Min. :0.0000 Min. :0.000000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.000000 Median :0.5000 Median :0.00000
## Mean :0.1909 Mean :0.009785 Mean :0.7841 Mean :0.06556
## 3rd Qu.:0.0000 3rd Qu.:0.000000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.000000 Max. :6.0000 Max. :3.00000
##
## after aid air airport
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.07035 Mean :0.3409 Mean :0.6273 Mean :0.1136
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :3.00000 Max. :3.0000 Max. :5.0000 Max. :6.0000
##
## airy alarm all allowed
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :1.000 Median :0.0000 Median :0.0000
## Mean :0.06487 Mean :1.151 Mean :0.5242 Mean :0.4587
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :3.00000 Max. :3.000 Max. :6.0000 Max. :3.0000
##
## also amazing amenities and
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 4.000
## Median :0.0000 Median :0.00000 Median :0.00000 Median : 6.000
## Mean :0.2651 Mean :0.07759 Mean :0.08982 Mean : 6.028
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 8.000
## Max. :6.0000 Max. :6.00000 Max. :3.00000 Max. :19.000
##
## any apartment appliances are
## Min. :0.00000 Min. : 0.000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.: 0.000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median : 2.000 Median :0.00000 Median :0.0000
## Mean :0.06125 Mean : 1.855 Mean :0.07906 Mean :0.6304
## 3rd Qu.:0.00000 3rd Qu.: 3.000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :4.00000 Max. :10.000 Max. :3.00000 Max. :9.0000
##
## area areas around art
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :0.00000 Median :0.00000
## Mean :0.4422 Mean :0.118 Mean :0.08933 Mean :0.08239
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :7.0000 Max. :3.000 Max. :4.00000 Max. :5.00000
##
## australia available away back
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.8751 Mean :0.2576 Mean :0.2644 Mean :0.1049
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :3.0000 Max. :5.0000 Max. :5.0000 Max. :5.0000
##
## backyard balcony bars basics
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2926 Mean :0.6933 Mean :0.1434 Mean :0.4524
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :5.0000 Max. :7.0000 Max. :3.0000 Max. :2.0000
##
## bath bathroom bathrooms bathtub
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1353 Mean :0.4984 Mean :0.1019 Mean :0.1369
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :4.0000 Max. :6.0000 Max. :3.0000 Max. :3.0000
##
## bay bbq beach beaches
## Min. :0.0000 Min. :0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median : 0.0000 Median :0.0000
## Mean :0.1233 Mean :0.3078 Mean : 0.9493 Mean :0.1593
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.: 2.0000 3rd Qu.:0.0000
## Max. :7.0000 Max. :4.0000 Max. :14.0000 Max. :6.0000
##
## beautiful bed bedroom bedrooms.1
## Min. :0.0000 Min. : 0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median : 1.000 Median :1.000 Median :0.0000
## Mean :0.3021 Mean : 1.073 Mean :1.101 Mean :0.2669
## 3rd Qu.:0.0000 3rd Qu.: 2.000 3rd Qu.:2.000 3rd Qu.:0.0000
## Max. :5.0000 Max. :10.000 Max. :9.000 Max. :5.0000
##
## beds.1 been before best
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.1065 Mean :0.05724 Mean :0.07153 Mean :0.1273
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :5.0000 Max. :3.00000 Max. :4.00000 Max. :4.0000
##
## big blankets block bondi
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.0954 Mean :0.2869 Mean :0.06859 Mean :0.5107
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :5.0000 Max. :3.0000 Max. :3.00000 Max. :9.0000
##
## books both brand breakfast
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1108 Mean :0.09393 Mean :0.1019 Mean :0.1875
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :3.0000 Max. :5.00000 Max. :9.0000 Max. :6.0000
##
## bright building built bus
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.000 Median :0.0000
## Mean :0.1538 Mean :0.186 Mean :0.125 Mean :0.2858
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :4.0000 Max. :5.000 Max. :4.000 Max. :8.0000
##
## buses business but cable
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.000
## Mean :0.08043 Mean :0.07427 Mean :0.1527 Mean :0.195
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :4.00000 Max. :3.00000 Max. :4.0000 Max. :5.000
##
## cafe cafes can car
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.000 Median :0.0000 Median :0.00000
## Mean :0.05675 Mean :0.289 Mean :0.3361 Mean :0.09119
## 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :3.00000 Max. :3.000 Max. :6.0000 Max. :3.00000
##
## carbon cbd central centre
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2352 Mean :0.2724 Mean :0.1474 Mean :0.1303
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :5.0000 Max. :5.0000 Max. :5.0000
##
## chair children city clean
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1091 Mean :0.1867 Mean :0.5178 Mean :0.1442
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :4.0000 Max. :6.0000 Max. :7.0000 Max. :5.0000
##
## cleaning close coffee comfortable
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.06585 Mean :0.3912 Mean :0.3832 Mean :0.2291
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :4.00000 Max. :6.0000 Max. :6.0000 Max. :4.0000
##
## comfy conditioning connection convenient
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.06203 Mean :0.5491 Mean :0.06213 Mean :0.07769
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :4.00000 Max. :4.0000 Max. :2.00000 Max. :3.00000
##
## coogee cooking cosy couple
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1045 Mean :0.5037 Mean :0.08415 Mean :0.0682
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :7.0000 Max. :4.0000 Max. :4.00000 Max. :6.0000
##
## couples courtyard crib darkening
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.07847 Mean :0.08845 Mean :0.1211 Mean :0.08072
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :2.00000 Max. :4.00000 Max. :4.0000 Max. :1.00000
##
## darling day deck dining
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08004 Mean :0.09961 Mean :0.07652 Mean :0.2669
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :9.00000 Max. :4.00000 Max. :5.00000 Max. :5.0000
##
## dishes dishwasher distance door
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.5016 Mean :0.4085 Mean :0.151 Mean :0.2169
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :3.0000 Max. :3.0000 Max. :5.000 Max. :4.0000
##
## double down downstairs drive
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.2556 Mean :0.05362 Mean :0.06546 Mean :0.09119
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :6.0000 Max. :3.00000 Max. :3.00000 Max. :6.00000
##
## dropoff dryer easy elevator
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :2.000 Median :0.0000 Median :0.0000
## Mean :0.1801 Mean :1.456 Mean :0.1423 Mean :0.2532
## 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :4.000 Max. :4.0000 Max. :3.0000
##
## email enjoy ensuite entire
## Min. :0.000 Min. :0.0000 Min. :0.00000 Min. :0.00
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00
## Median :1.000 Median :0.0000 Median :0.00000 Median :1.00
## Mean :1.113 Mean :0.2552 Mean :0.09863 Mean :0.74
## 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.00
## Max. :4.000 Max. :6.0000 Max. :5.00000 Max. :4.00
##
## entrance equipped essentials etc
## Min. :0.0000 Min. :0.000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :1.000 Median :0.00000
## Mean :0.3307 Mean :0.196 Mean :1.012 Mean :0.06135
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.00000
## Max. :4.0000 Max. :5.000 Max. :4.000 Max. :5.00000
##
## ethernet everything extinguisher extra
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.05616 Mean :0.1521 Mean :0.3242 Mean :0.3367
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :2.00000 Max. :4.0000 Max. :1.0000 Max. :4.0000
##
## facebook facilities families family
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1984 Mean :0.1179 Mean :0.06761 Mean :0.2265
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :4.0000 Max. :3.00000 Max. :7.0000
##
## famous features feel ferry
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.08405 Mean :0.1023 Mean :0.0955 Mean :0.07916
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :3.00000 Max. :4.0000 Max. :5.0000 Max. :5.00000
##
## few filled fire fireplace
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.05949 Mean :0.1114 Mean :0.3356 Mean :0.09227
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :3.00000 Max. :4.0000 Max. :3.0000 Max. :4.00000
##
## first flat floor for.
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.000 Median :1.000
## Mean :0.3821 Mean :0.1078 Mean :0.224 Mean :1.192
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:2.000
## Max. :7.0000 Max. :6.0000 Max. :6.000 Max. :9.000
##
## free fridge friendly from
## Min. : 0.00 Min. :0.00000 Min. :0.000 Min. : 0.0000
## 1st Qu.: 0.00 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.: 0.0000
## Median : 1.00 Median :0.00000 Median :1.000 Median : 0.0000
## Mean : 1.02 Mean :0.09736 Mean :0.772 Mean : 0.7746
## 3rd Qu.: 2.00 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.: 1.0000
## Max. :10.00 Max. :3.00000 Max. :5.000 Max. :12.0000
##
## front full fully furnished
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1154 Mean :0.1728 Mean :0.2789 Mean :0.1469
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :5.0000 Max. :5.0000 Max. :6.0000 Max. :4.0000
##
## garden gas gel get
## Min. : 0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.: 0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median : 0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean : 0.4385 Mean :0.07857 Mean :0.09159 Mean :0.07143
## 3rd Qu.: 1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :10.0000 Max. :4.00000 Max. :2.00000 Max. :4.00000
##
## good government great greets
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :1.000 Median :0.0000 Median :0.0000
## Mean :0.08943 Mean :1.277 Mean :0.2699 Mean :0.1464
## 3rd Qu.:0.00000 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :4.00000 Max. :3.000 Max. :5.0000 Max. :2.0000
##
## grill ground guest guests
## Min. :0.0000 Min. :0.00000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.000 Median :0.0000
## Mean :0.1503 Mean :0.06468 Mean :0.507 Mean :0.2324
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :2.0000 Max. :4.00000 Max. :7.000 Max. :7.0000
##
## gym hair hangers harbour
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:1.0000 1st Qu.: 0.0000
## Median :0.0000 Median :1.000 Median :1.0000 Median : 0.0000
## Mean :0.1838 Mean :0.713 Mean :0.8125 Mean : 0.2337
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.: 0.0000
## Max. :5.0000 Max. :3.000 Max. :3.0000 Max. :11.0000
##
## has have heart heating
## Min. :0.0000 Min. : 0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 0.0000 Median :0.0000 Median :1.0000
## Mean :0.5863 Mean : 0.5109 Mean :0.1796 Mean :0.6841
## 3rd Qu.:1.0000 3rd Qu.: 1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :8.0000 Max. :10.0000 Max. :3.0000 Max. :4.0000
##
## high hills holiday home
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median : 0.0000
## Mean :0.2105 Mean :0.09393 Mean :0.07808 Mean : 0.6689
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 1.0000
## Max. :4.0000 Max. :5.00000 Max. :4.00000 Max. :12.0000
##
## host hot hotel house
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.: 0.0000
## Median :0.0000 Median :1.000 Median :0.0000 Median : 0.0000
## Mean :0.1703 Mean :0.726 Mean :0.0636 Mean : 0.9113
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.: 1.0000
## Max. :3.0000 Max. :5.000 Max. :6.0000 Max. :10.0000
##
## huge ideal identity includes
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.07387 Mean :0.07045 Mean :0.2944 Mean :0.0638
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :5.00000 Max. :3.00000 Max. :1.0000 Max. :4.0000
##
## including indoor internal internet
## Min. :0.000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.123 Mean :0.1161 Mean :0.06556 Mean :0.07877
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :4.000 Max. :4.0000 Max. :3.00000 Max. :3.00000
##
## into iron its jumio
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :1.0000 Median :0.0000 Median :1.0000
## Mean :0.09119 Mean :0.7967 Mean :0.1034 Mean :0.6832
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :5.00000 Max. :3.0000 Max. :5.0000 Max. :1.0000
##
## junction just king kit
## Min. :0.00000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.08415 Mean :0.2632 Mean :0.127 Mean :0.3418
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:1.0000
## Max. :5.00000 Max. :6.0000 Max. :5.000 Max. :4.0000
##
## kitchen laptop large laundry
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :2.000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :1.633 Mean :0.6645 Mean :0.4742 Mean :0.2444
## 3rd Qu.:2.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :6.000 Max. :2.0000 Max. :7.0000 Max. :5.0000
##
## leafy level light like
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.08033 Mean :0.2143 Mean :0.2474 Mean :0.09775
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :3.00000 Max. :5.0000 Max. :5.0000 Max. :4.00000
##
## linen linens living local
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.09002 Mean :0.4167 Mean :0.5711 Mean :0.1132
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :3.00000 Max. :3.0000 Max. :6.0000 Max. :4.0000
##
## located location lock lockbox
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.3774 Mean :0.291 Mean :0.1441 Mean :0.1956
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :5.0000 Max. :7.000 Max. :4.0000 Max. :3.0000
##
## long looking lots lounge
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.3226 Mean :0.06321 Mean :0.05998 Mean :0.1918
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :4.0000 Max. :4.00000 Max. :4.00000 Max. :6.0000
##
## love lovely luggage luxury
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.09462 Mean :0.1286 Mean :0.1877 Mean :0.07358
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :4.00000 Max. :5.0000 Max. :2.0000 Max. :3.00000
##
## machine main make maker
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1531 Mean :0.1226 Mean :0.07681 Mean :0.2652
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :5.0000 Max. :5.0000 Max. :4.00000 Max. :4.0000
##
## manly manual many master
## Min. : 0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00
## 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00
## Median : 0.0000 Median :0.0000 Median :0.0000 Median :0.00
## Mean : 0.2197 Mean :0.3267 Mean :0.0683 Mean :0.11
## 3rd Qu.: 0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00
## Max. :10.0000 Max. :3.0000 Max. :3.0000 Max. :5.00
##
## microwave min mins minute
## Min. :0.0000 Min. : 0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 0.0000 Median : 0.0000 Median :0.0000
## Mean :0.5262 Mean : 0.2716 Mean : 0.2573 Mean :0.2136
## 3rd Qu.:1.0000 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.:0.0000
## Max. :4.0000 Max. :12.0000 Max. :10.0000 Max. :7.0000
##
## minutes modern monoxide more
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.3776 Mean :0.3717 Mean :0.2351 Mean :0.07994
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :8.0000 Max. :6.0000 Max. :2.0000 Max. :3.00000
##
## most natural near need
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08904 Mean :0.06106 Mean :0.09834 Mean :0.1647
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :4.00000 Max. :3.00000 Max. :5.00000 Max. :4.0000
##
## netflix new newly next.
## Min. :0.00000 Min. : 0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.: 1.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median : 1.000 Median :0.00000 Median :0.00000
## Mean :0.08239 Mean : 1.091 Mean :0.07759 Mean :0.05959
## 3rd Qu.:0.00000 3rd Qu.: 1.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :3.00000 Max. :13.000 Max. :3.00000 Max. :3.00000
##
## nice night north not
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.07573 Mean :0.05793 Mean :0.1331 Mean :0.1335
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :5.00000 Max. :4.00000 Max. :7.0000 Max. :5.0000
##
## note ocean off offer
## Min. :0.000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.237 Mean :0.09883 Mean :0.1471 Mean :0.07485
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :4.000 Max. :6.00000 Max. :3.0000 Max. :3.00000
##
## offers offline one only
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.0771 Mean :0.5101 Mean :0.4055 Mean :0.2705
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :5.0000 Max. :2.0000 Max. :8.0000 Max. :5.0000
##
## open other our out
## Min. :0.0000 Min. :0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median : 0.0000 Median :0.0000
## Mean :0.2236 Mean :0.2919 Mean : 0.4974 Mean :0.1906
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.: 1.0000 3rd Qu.:0.0000
## Max. :5.0000 Max. :5.0000 Max. :10.0000 Max. :5.0000
##
## outdoor outside oven over
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1868 Mean :0.07006 Mean :0.4921 Mean :0.1006
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :7.0000 Max. :4.00000 Max. :4.0000 Max. :6.0000
##
## own pack paid park
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1854 Mean :0.06556 Mean :0.0998 Mean :0.2452
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :6.0000 Max. :2.00000 Max. :3.0000 Max. :9.0000
##
## parking parks patio peaceful
## Min. :0.00 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :1.00 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :1.16 Mean :0.06781 Mean :0.3603 Mean :0.06272
## 3rd Qu.:2.00 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :8.00 Max. :3.00000 Max. :5.0000 Max. :3.00000
##
## people perfect phone pillows
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.000 Median :0.0000
## Mean :0.1068 Mean :0.2652 Mean :1.002 Mean :0.2954
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :5.0000 Max. :5.0000 Max. :3.000 Max. :2.0000
##
## place plan play please
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.2826 Mean :0.1347 Mean :0.08268 Mean :0.1417
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :6.0000 Max. :5.0000 Max. :3.00000 Max. :5.0000
##
## plenty plus pool premises
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :1.0000
## Mean :0.1077 Mean :0.05871 Mean :0.4327 Mean :0.5378
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :4.0000 Max. :5.00000 Max. :8.0000 Max. :4.0000
##
## private property provided public
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :1.114 Mean :0.1018 Mean :0.1023 Mean :0.1186
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :8.000 Max. :5.0000 Max. :4.0000 Max. :3.0000
##
## quality queen quiet refrigerator
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.06869 Mean :0.3068 Mean :0.2841 Mean :0.503
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :4.00000 Max. :5.0000 Max. :4.0000 Max. :3.000
##
## relax relaxing renovated restaurants
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.09755 Mean :0.06223 Mean :0.1662 Mean :0.3567
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :3.00000 Max. :3.00000 Max. :4.0000 Max. :6.0000
##
## reviews right road room
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.: 0.000
## Median :1.0000 Median :0.00000 Median :0.0000 Median : 1.000
## Mean :0.7418 Mean :0.09599 Mean :0.0817 Mean : 1.416
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.: 2.000
## Max. :3.0000 Max. :4.00000 Max. :4.0000 Max. :11.000
##
## rooms second secure self
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1108 Mean :0.0862 Mean :0.07613 Mean :0.07583
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :6.0000 Max. :4.0000 Max. :3.00000 Max. :4.00000
##
## selfie separate set shades
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.3174 Mean :0.1107 Mean :0.07544 Mean :0.08112
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :2.0000 Max. :5.0000 Max. :3.00000 Max. :1.00000
##
## shampoo share shared shopping
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.7204 Mean :0.06155 Mean :0.1475 Mean :0.1584
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :3.0000 Max. :6.00000 Max. :8.0000 Max. :4.0000
##
## shops short shower silverware
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2494 Mean :0.1689 Mean :0.2205 Mean :0.497
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.000
## Max. :4.0000 Max. :4.0000 Max. :4.0000 Max. :2.000
##
## single situated size sized
## Min. :0.000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.278 Mean :0.07945 Mean :0.1728 Mean :0.08327
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :5.000 Max. :3.00000 Max. :6.0000 Max. :5.00000
##
## small smoke sofa some
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.00000
## Mean :0.1262 Mean :0.9178 Mean :0.1069 Mean :0.07045
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :5.0000 Max. :3.0000 Max. :5.0000 Max. :4.00000
##
## south space spacious station
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:1.0000 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.000 Median :0.0000 Median :0.0000
## Mean :0.8326 Mean :1.103 Mean :0.3282 Mean :0.3072
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :4.0000 Max. :6.000 Max. :4.0000 Max. :6.0000
##
## stay stays stop stove
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2445 Mean :0.2959 Mean :0.1023 Mean :0.4726
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :5.0000 Max. :6.0000 Max. :3.0000 Max. :5.0000
##
## street stroll studio stunning
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :1.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.6623 Mean :0.07916 Mean :0.09638 Mean :0.08483
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :6.0000 Max. :3.00000 Max. :8.00000 Max. :4.00000
##
## style stylish suite summer
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09912 Mean :0.07671 Mean :0.07309 Mean :0.06047
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :4.00000 Max. :4.00000 Max. :8.00000 Max. :5.00000
##
## sun sunny surry swimming
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1079 Mean :0.1313 Mean :0.08376 Mean :0.0681
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :4.0000 Max. :6.0000 Max. :5.00000 Max. :5.0000
##
## sydney table take tea
## Min. :0.000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :1.000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :1.109 Mean :0.1314 Mean :0.06996 Mean :0.05323
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :9.000 Max. :4.0000 Max. :3.00000 Max. :3.00000
##
## term terrace than that
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.3016 Mean :0.1453 Mean :0.06243 Mean :0.2633
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :4.0000 Max. :5.0000 Max. :4.00000 Max. :5.0000
##
## the there things this
## Min. : 0.000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.: 3.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median : 6.000 Median :0.0000 Median :0.000 Median :0.0000
## Mean : 5.997 Mean :0.3977 Mean :0.208 Mean :0.6754
## 3rd Qu.: 9.000 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:1.0000
## Max. :29.000 Max. :7.0000 Max. :3.000 Max. :7.0000
##
## three throughout time top
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.06517 Mean :0.06419 Mean :0.07534 Mean :0.1288
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :6.00000 Max. :3.00000 Max. :5.00000 Max. :5.0000
##
## towels toys train transport
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.09814 Mean :0.1105 Mean :0.2478 Mean :0.1772
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :4.00000 Max. :4.0000 Max. :5.0000 Max. :3.0000
##
## travel tub two u2019n
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.08337 Mean :0.09765 Mean :0.3391 Mean :0.06223
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :3.00000 Max. :4.00000 Max. :8.0000 Max. :1.00000
##
## u2019s unit unlimited use
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1468 Mean :0.1108 Mean :0.0547 Mean :0.2044
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.0000 Max. :6.0000 Max. :3.0000 Max. :6.0000
##
## utensils very view views
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.05704 Mean :0.2566 Mean :0.1307 Mean :0.3091
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :2.00000 Max. :7.0000 Max. :6.0000 Max. :9.0000
##
## village wales walk walking
## Min. :0.00000 Min. :0.0000 Min. : 0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:1.0000 1st Qu.: 0.0000 1st Qu.:0.0000
## Median :0.00000 Median :1.0000 Median : 0.0000 Median :0.0000
## Mean :0.06712 Mean :0.8166 Mean : 0.7417 Mean :0.1874
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.: 1.0000 3rd Qu.:0.0000
## Max. :4.00000 Max. :4.0000 Max. :12.0000 Max. :6.0000
##
## want wardrobe washer washing
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.05519 Mean :0.1037 Mean :0.9711 Mean :0.1228
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :4.00000 Max. :3.0000 Max. :4.0000 Max. :4.0000
##
## water welcome well where
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.6888 Mean :0.1146 Mean :0.1782 Mean :0.07074
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :6.0000 Max. :3.0000 Max. :5.0000 Max. :5.00000
##
## which while. who whole
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1785 Mean :0.06448 Mean :0.06517 Mean :0.06732
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :4.0000 Max. :3.00000 Max. :3.00000 Max. :3.00000
##
## wifi will with within
## Min. :0.000 Min. :0.0000 Min. : 0.000 Min. :0.0000
## 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.: 1.000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median : 2.000 Median :0.0000
## Mean :1.155 Mean :0.4098 Mean : 2.389 Mean :0.1311
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.: 3.000 3rd Qu.:0.0000
## Max. :5.000 Max. :7.0000 Max. :15.000 Max. :4.0000
##
## work workspace you your
## Min. :0.0000 Min. :0.0000 Min. : 0.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.: 0.0000
## Median :0.0000 Median :1.0000 Median : 1.000 Median : 0.0000
## Mean :0.2168 Mean :0.6635 Mean : 1.338 Mean : 0.5348
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.: 2.000 3rd Qu.: 1.0000
## Max. :3.0000 Max. :2.0000 Max. :13.000 Max. :10.0000
##
str(df)
## 'data.frame': 10220 obs. of 434 variables:
## $ id : int 24456644 23737008 40868452 21988667 8912495 39766755 43884965 8598248 29813566 21270682 ...
## $ host_is_superhost : Factor w/ 2 levels "f","t": 1 1 1 1 1 1 1 1 1 2 ...
## $ host_has_profile_pic : Factor w/ 2 levels "f","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ host_identity_verified : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
## $ neighbourhood_cleansed : Factor w/ 38 levels "Ashfield","Auburn",..: 5 33 33 28 33 33 27 28 36 27 ...
## $ instant_bookable : Factor w/ 2 levels "f","t": 1 2 1 2 1 2 2 2 2 1 ...
## $ room_type : Factor w/ 4 levels "Entire home/apt",..: 1 1 3 3 3 1 1 1 1 1 ...
## $ host_listings_count : int 14 1 0 1 1 118 1 1 1 2 ...
## $ host_total_listings_count : int 14 1 0 1 1 118 1 1 1 2 ...
## $ latitude : num -33.9 -33.9 -33.9 -33.9 -33.9 ...
## $ longitude : num 151 151 151 151 151 ...
## $ accommodates : int 4 4 2 1 1 3 2 4 3 6 ...
## $ bedrooms : int 1 2 1 1 1 1 1 1 1 3 ...
## $ beds : int 1 2 1 1 1 2 1 2 1 3 ...
## $ price : int 200 150 55 45 49 112 281 248 101 500 ...
## $ minimum_nights : int 1 2 7 3 1 3 2 10 3 7 ...
## $ maximum_nights : int 90 365 20 60 1125 1125 7 14 1125 1125 ...
## $ minimum_minimum_nights : int 1 2 7 3 1 2 2 5 3 7 ...
## $ maximum_minimum_nights : int 1 2 7 3 1 5 2 10 3 7 ...
## $ minimum_maximum_nights : int 90 365 20 60 1125 1125 1125 14 1125 1125 ...
## $ maximum_maximum_nights : int 90 365 20 60 1125 1125 1125 14 1125 1125 ...
## $ minimum_nights_avg_ntm : num 1 2 7 3 1 3.1 2 8.6 3 7 ...
## $ maximum_nights_avg_ntm : num 90 365 20 60 1125 ...
## $ availability_30 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ availability_60 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ availability_90 : int 0 0 0 0 0 0 2 0 0 0 ...
## $ availability_365 : int 0 0 0 0 0 126 2 0 0 0 ...
## $ number_of_reviews_ltm : int 0 0 2 0 0 5 13 0 5 3 ...
## $ number_of_reviews_l30d : int 0 0 0 0 0 0 1 0 0 0 ...
## $ review_scores_rating : int 94 96 90 90 90 92 100 98 92 98 ...
## $ review_scores_accuracy : int 10 10 10 9 10 10 10 10 9 10 ...
## $ review_scores_cleanliness : int 9 10 8 8 9 9 10 10 9 10 ...
## $ review_scores_checkin : int 9 9 10 10 10 10 10 10 10 10 ...
## $ review_scores_communication : int 9 10 10 10 10 10 10 10 10 10 ...
## $ review_scores_location : int 9 10 8 8 9 10 10 10 10 10 ...
## $ review_scores_value : int 9 10 9 9 9 8 10 10 9 10 ...
## $ calculated_host_listings_count : int 14 1 1 1 1 113 1 1 1 1 ...
## $ calculated_host_listings_count_entire_homes : int 9 1 0 0 0 113 1 1 1 1 ...
## $ calculated_host_listings_count_private_rooms: int 5 0 1 1 1 0 0 0 0 0 ...
## $ calculated_host_listings_count_shared_rooms : int 0 0 0 0 0 0 0 0 0 0 ...
## $ reviews_per_month : num 1.34 1.68 0.2 0.06 0.03 0.43 3.1 0.16 2.04 0.29 ...
## $ bathsum : num 1 2 1 1 2 1 1 1 1 2 ...
## $ bathclass : int 0 0 1 1 0 0 0 0 0 0 ...
## $ istop100 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ access : int 1 3 0 1 0 0 0 2 2 0 ...
## $ across : int 0 0 0 0 0 0 0 0 0 0 ...
## $ after : int 0 0 0 0 0 0 0 0 0 0 ...
## $ aid : int 0 1 0 0 0 1 0 0 0 1 ...
## $ air : int 0 2 0 0 2 0 2 0 0 1 ...
## $ airport : int 0 2 0 0 0 0 0 0 0 0 ...
## $ airy : int 0 0 0 0 0 0 0 0 0 0 ...
## $ alarm : int 0 1 1 1 0 2 1 1 1 1 ...
## $ all : int 0 0 2 1 0 1 0 0 0 0 ...
## $ allowed : int 1 2 0 0 0 0 0 0 0 1 ...
## $ also : int 0 1 0 0 0 0 0 0 0 0 ...
## $ amazing : int 0 0 0 0 0 0 0 0 0 0 ...
## $ amenities : int 0 0 0 0 0 0 0 0 0 0 ...
## $ and : int 2 8 5 2 0 10 1 13 5 10 ...
## $ any : int 0 0 0 0 0 0 0 0 0 0 ...
## $ apartment : int 2 2 0 1 0 3 2 2 4 0 ...
## $ appliances : int 0 0 0 0 0 1 0 0 0 0 ...
## $ are : int 0 2 1 0 0 1 0 0 0 1 ...
## $ area : int 0 1 0 0 1 0 0 1 0 0 ...
## $ areas : int 0 1 0 0 0 0 0 1 1 0 ...
## $ around : int 0 0 0 0 0 1 0 1 0 0 ...
## $ art : int 0 0 0 0 0 1 0 0 0 0 ...
## $ australia : int 1 1 1 1 0 1 1 1 1 0 ...
## $ available : int 0 0 1 1 0 0 0 1 0 0 ...
## $ away : int 0 0 1 0 0 0 0 0 0 0 ...
## $ back : int 0 0 0 0 0 0 0 0 1 1 ...
## $ backyard : int 1 0 1 0 0 0 0 0 0 1 ...
## $ balcony : int 1 1 0 2 0 0 0 3 0 1 ...
## $ bars : int 0 0 0 0 0 0 0 0 0 0 ...
## $ basics : int 1 1 0 0 0 1 0 1 1 1 ...
## $ bath : int 0 0 0 0 0 0 0 1 0 1 ...
## $ bathroom : int 0 0 0 0 1 1 0 2 0 0 ...
## $ bathrooms : int 0 1 0 0 0 0 0 0 0 0 ...
## $ bathtub : int 0 1 0 0 0 1 0 0 0 0 ...
## $ bay : int 0 0 0 0 0 0 0 0 0 0 ...
## $ bbq : int 1 0 0 0 0 0 2 0 0 2 ...
## $ beach : int 0 0 0 0 0 1 2 1 6 3 ...
## $ beaches : int 0 0 0 0 0 0 1 0 0 1 ...
## $ beautiful : int 0 0 1 0 0 0 1 0 0 0 ...
## $ bed : int 1 2 1 1 1 2 1 3 1 1 ...
## $ bedroom : int 0 1 0 5 3 0 0 5 2 0 ...
## $ bedrooms.1 : int 0 1 2 0 0 0 0 0 0 0 ...
## $ beds.1 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ been : int 0 0 0 0 0 1 0 0 1 0 ...
## $ before : int 0 0 0 0 0 0 0 0 0 0 ...
## $ best : int 0 0 0 0 0 1 0 0 0 1 ...
## $ big : int 0 0 0 2 0 0 0 0 0 0 ...
## $ blankets : int 1 1 0 1 0 0 1 0 1 1 ...
## $ block : int 0 0 0 0 0 1 0 1 0 0 ...
## $ bondi : int 0 0 0 0 0 1 0 1 4 0 ...
## $ books : int 0 1 0 0 0 0 0 1 0 1 ...
## $ both : int 0 0 0 0 0 0 0 0 0 0 ...
## $ brand : int 0 0 0 0 0 0 0 0 0 0 ...
## $ breakfast : int 0 0 0 0 0 0 0 0 0 1 ...
## $ bright : int 0 0 1 0 0 1 0 0 0 0 ...
## [list output truncated]
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
#Factor Drawing
for(i in 2:7){
png(paste(i,names(df)[i],".png",sep=""))
print({
ggplot()+geom_bar(aes(x=df[,i],fill=as.factor(df$istop100)),position="fill")+
labs(x=names(df)[i])+coord_flip()
})
dev.off()
}
#Numerical variables
for(i in 8:43){
png(paste(i,".0",names(df)[i],".png",sep=""))
print({
ggplot()+geom_density(aes(x=df[,i],color=as.factor(df$istop100)))+
labs(x=paste(i,names(df)[i],sep=" "))
})
dev.off()
}
#Logarithmic Numerical Variables
for(i in c(8,9,13,15,16,18,19,20,21,22,23,29,37,38,39,40,42)){
png(paste(i,".1",names(df)[i],".png",sep=""))
print({
ggplot()+geom_density(aes(x=log(df[,i]+1),color=as.factor(df$istop100)))+
labs(x=paste(i,"log",names(df)[i],sep=" "))
})
dev.off()
}
#Text
for(i in 45:434){
png(paste(i,names(df)[i],".png",sep=""))
print({
ggplot()+geom_bar(aes(x=df[,i],fill=as.factor(df$istop100)),position="fill")+
labs(x=paste(i,names(df)[i],sep=" "))+coord_flip()
})
dev.off()
}
#Load drawing-related packages
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.6.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ tibble 3.0.4 √ purrr 0.3.4
## √ tidyr 1.1.2 √ dplyr 1.0.2
## √ readr 1.4.0 √ forcats 0.5.0
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'readr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x ggplot2::annotate() masks NLP::annotate()
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag() masks stats::lag()
library("jsonlite")
library("ggplot2")
library(plyr)
## Warning: package 'plyr' was built under R version 3.6.3
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(dplyr)
library(leaflet)
## Warning: package 'leaflet' was built under R version 3.6.3
#Read map data neighborhoods.geojson
geoData2 <- readLines("neighbourhoods.geojson",warn=FALSE) %>%
paste(collapse = "\n")%>%fromJSON(simplifyVector = FALSE)
#Non-top100 points
other <- df %>%
filter(istop100 == 0)
#top100 points
top_100 <- df %>%
filter(istop100 == 1)
#Drawing a map
leaflet() %>% setView(lng = 151.1, lat = -33.8,zoom=10) %>%
addCircleMarkers(lng = other$longitude, lat = other$latitude,radius = 2,
stroke = FALSE,color = "green",fillOpacity = 0.5, group = "Other") %>%
addCircleMarkers(lng = top_100$longitude, lat = top_100$latitude, radius = 3,
stroke = FALSE,color = "red",fillOpacity = 0.9, group = "Top 100")%>%
addGeoJSON(geoData2)
#Use random number seed 123 for stratified sampling
dfistop100<-subset(df,df$istop100==1)
dfnotop100<-subset(df,df$istop100==0)
set.seed(123)
sam1<-sample(100,60)
set.seed(123)
sam2<-sample(nrow(dfnotop100),nrow(dfnotop100)*0.6)
dftrain<-rbind(dfistop100[sam1,],dfnotop100[sam2,])
dftest<-rbind(dfistop100[-sam1,],dfnotop100[-sam2,])
library(gbm)
## Warning: package 'gbm' was built under R version 3.6.3
## Loaded gbm 2.1.8
modelgbm<-gbm(istop100~.,dftrain[,-1],distribution = "bernoulli",interaction.depth=6,
shrinkage=0.01,n.trees=500,n.cores=6,cv.fold=3)
bestntrees<-gbm.perf(modelgbm,method="cv")
summary(modelgbm,n.trees=bestntrees)
## var
## reviews_per_month reviews_per_month
## neighbourhood_cleansed neighbourhood_cleansed
## reviews reviews
## number_of_reviews_ltm number_of_reviews_ltm
## availability_365 availability_365
## longitude longitude
## selfie selfie
## city city
## review_scores_rating review_scores_rating
## availability_90 availability_90
## one one
## art art
## mins mins
## availability_30 availability_30
## bondi bondi
## street street
## people people
## price price
## identity identity
## the the
## best best
## bedroom bedroom
## apartment apartment
## availability_60 availability_60
## dishwasher dishwasher
## quiet quiet
## calculated_host_listings_count_private_rooms calculated_host_listings_count_private_rooms
## number_of_reviews_l30d number_of_reviews_l30d
## kitchen kitchen
## guest guest
## spacious spacious
## government government
## maximum_nights maximum_nights
## dryer dryer
## backyard backyard
## within within
## latitude latitude
## elevator elevator
## bus bus
## darling darling
## minimum_nights_avg_ntm minimum_nights_avg_ntm
## water water
## calculated_host_listings_count_entire_homes calculated_host_listings_count_entire_homes
## internet internet
## please please
## your your
## living living
## bathroom bathroom
## all all
## and and
## maximum_minimum_nights maximum_minimum_nights
## has has
## top top
## parking parking
## just just
## door door
## washing washing
## not not
## other other
## walk walk
## place place
## room room
## large large
## stop stop
## there there
## min min
## with with
## balcony balcony
## cafes cafes
## private private
## maker maker
## instant_bookable instant_bookable
## minimum_minimum_nights minimum_minimum_nights
## heart heart
## terrace terrace
## access access
## free free
## linens linens
## beds beds
## note note
## conditioning conditioning
## you you
## everything everything
## for. for.
## house house
## guests guests
## alarm alarm
## away away
## travel travel
## amazing amazing
## restaurants restaurants
## sydney sydney
## hot hot
## wifi wifi
## that that
## over over
## right right
## manual manual
## size size
## out out
## ground ground
## great great
## calculated_host_listings_count calculated_host_listings_count
## buses buses
## host_listings_count host_listings_count
## modern modern
## beach beach
## accommodates accommodates
## microwave microwave
## very very
## single single
## double double
## offer offer
## lockbox lockbox
## have have
## king king
## extra extra
## our our
## this this
## from from
## like like
## located located
## station station
## queen queen
## park park
## premises premises
## host_is_superhost host_is_superhost
## review_scores_cleanliness review_scores_cleanliness
## rooms rooms
## blankets blankets
## email email
## unlimited unlimited
## bay bay
## level level
## beaches beaches
## are are
## situated situated
## coffee coffee
## garden garden
## offline offline
## beautiful beautiful
## minimum_maximum_nights minimum_maximum_nights
## paid paid
## aid aid
## allowed allowed
## bathclass bathclass
## new new
## bbq bbq
## dropoff dropoff
## light light
## harbour harbour
## bathsum bathsum
## patio patio
## own own
## oven oven
## can can
## floor floor
## plus plus
## beds.1 beds.1
## train train
## crib crib
## bed bed
## pack pack
## enjoy enjoy
## peaceful peaceful
## extinguisher extinguisher
## bright bright
## refrigerator refrigerator
## review_scores_accuracy review_scores_accuracy
## which which
## across across
## separate separate
## chair chair
## tea tea
## front front
## available available
## air air
## bedrooms bedrooms
## off off
## sofa sofa
## manly manly
## minutes minutes
## space space
## work work
## stays stays
## cleaning cleaning
## lovely lovely
## things things
## shower shower
## many many
## minute minute
## stay stay
## ocean ocean
## location location
## home home
## views views
## studio studio
## easy easy
## equipped equipped
## laundry laundry
## host_has_profile_pic host_has_profile_pic
## host_identity_verified host_identity_verified
## room_type room_type
## host_total_listings_count host_total_listings_count
## minimum_nights minimum_nights
## maximum_maximum_nights maximum_maximum_nights
## maximum_nights_avg_ntm maximum_nights_avg_ntm
## review_scores_checkin review_scores_checkin
## review_scores_communication review_scores_communication
## review_scores_location review_scores_location
## review_scores_value review_scores_value
## calculated_host_listings_count_shared_rooms calculated_host_listings_count_shared_rooms
## after after
## airport airport
## airy airy
## also also
## amenities amenities
## any any
## appliances appliances
## area area
## areas areas
## around around
## australia australia
## back back
## bars bars
## basics basics
## bath bath
## bathrooms bathrooms
## bathtub bathtub
## bedrooms.1 bedrooms.1
## been been
## before before
## big big
## block block
## books books
## both both
## brand brand
## breakfast breakfast
## building building
## built built
## business business
## but but
## cable cable
## cafe cafe
## car car
## carbon carbon
## cbd cbd
## central central
## centre centre
## children children
## clean clean
## close close
## comfortable comfortable
## comfy comfy
## connection connection
## convenient convenient
## coogee coogee
## cooking cooking
## cosy cosy
## couple couple
## couples couples
## courtyard courtyard
## darkening darkening
## day day
## deck deck
## dining dining
## dishes dishes
## distance distance
## down down
## downstairs downstairs
## drive drive
## ensuite ensuite
## entire entire
## entrance entrance
## essentials essentials
## etc etc
## ethernet ethernet
## facebook facebook
## facilities facilities
## families families
## family family
## famous famous
## features features
## feel feel
## ferry ferry
## few few
## filled filled
## fire fire
## fireplace fireplace
## first first
## flat flat
## fridge fridge
## friendly friendly
## full full
## fully fully
## furnished furnished
## gas gas
## gel gel
## get get
## good good
## greets greets
## grill grill
## gym gym
## hair hair
## hangers hangers
## heating heating
## high high
## hills hills
## holiday holiday
## host host
## hotel hotel
## huge huge
## ideal ideal
## includes includes
## including including
## indoor indoor
## internal internal
## into into
## iron iron
## its its
## jumio jumio
## junction junction
## kit kit
## laptop laptop
## leafy leafy
## linen linen
## local local
## lock lock
## long long
## looking looking
## lots lots
## lounge lounge
## love love
## luggage luggage
## luxury luxury
## machine machine
## main main
## make make
## master master
## monoxide monoxide
## more more
## most most
## natural natural
## near near
## need need
## netflix netflix
## newly newly
## next. next.
## nice nice
## night night
## north north
## offers offers
## only only
## open open
## outdoor outdoor
## outside outside
## parks parks
## perfect perfect
## phone phone
## pillows pillows
## plan plan
## play play
## plenty plenty
## pool pool
## property property
## provided provided
## public public
## quality quality
## relax relax
## relaxing relaxing
## renovated renovated
## road road
## second second
## secure secure
## self self
## set set
## shades shades
## shampoo shampoo
## share share
## shared shared
## shopping shopping
## shops shops
## short short
## silverware silverware
## sized sized
## small small
## smoke smoke
## some some
## south south
## stove stove
## stroll stroll
## stunning stunning
## style style
## stylish stylish
## suite suite
## summer summer
## sun sun
## sunny sunny
## surry surry
## swimming swimming
## table table
## take take
## term term
## than than
## three three
## throughout throughout
## time time
## towels towels
## toys toys
## transport transport
## tub tub
## two two
## u2019n u2019n
## u2019s u2019s
## unit unit
## use use
## utensils utensils
## view view
## village village
## wales wales
## walking walking
## want want
## wardrobe wardrobe
## washer washer
## welcome welcome
## well well
## where where
## while. while.
## who who
## whole whole
## will will
## workspace workspace
## rel.inf
## reviews_per_month 24.613502968
## neighbourhood_cleansed 12.265933934
## reviews 8.560103762
## number_of_reviews_ltm 5.357568389
## availability_365 4.253743060
## longitude 1.623172228
## selfie 1.508859783
## city 1.460222682
## review_scores_rating 1.247490343
## availability_90 1.236219765
## one 1.118431715
## art 1.033383759
## mins 0.971954496
## availability_30 0.907957939
## bondi 0.900262822
## street 0.877715179
## people 0.812121710
## price 0.700881391
## identity 0.672910578
## the 0.662803331
## best 0.621309429
## bedroom 0.602930074
## apartment 0.593157924
## availability_60 0.572817872
## dishwasher 0.566341057
## quiet 0.551939112
## calculated_host_listings_count_private_rooms 0.547608596
## number_of_reviews_l30d 0.546756916
## kitchen 0.546745036
## guest 0.536979226
## spacious 0.506771380
## government 0.503765608
## maximum_nights 0.497954753
## dryer 0.461363929
## backyard 0.445761349
## within 0.433153558
## latitude 0.426932380
## elevator 0.421488436
## bus 0.391377286
## darling 0.389180233
## minimum_nights_avg_ntm 0.374192172
## water 0.368655714
## calculated_host_listings_count_entire_homes 0.361576176
## internet 0.344472535
## please 0.338026824
## your 0.310810038
## living 0.299216898
## bathroom 0.271929309
## all 0.270051193
## and 0.269989723
## maximum_minimum_nights 0.266813965
## has 0.266114731
## top 0.265078309
## parking 0.264764578
## just 0.262840214
## door 0.258559637
## washing 0.251099291
## not 0.245278860
## other 0.245207905
## walk 0.239775220
## place 0.233867292
## room 0.229530763
## large 0.228765327
## stop 0.226518880
## there 0.220734646
## min 0.219608515
## with 0.218409940
## balcony 0.215668084
## cafes 0.213564642
## private 0.211528121
## maker 0.201868803
## instant_bookable 0.201801971
## minimum_minimum_nights 0.200552801
## heart 0.200283598
## terrace 0.196308835
## access 0.195995696
## free 0.192851668
## linens 0.190352385
## beds 0.189719041
## note 0.184010260
## conditioning 0.174968778
## you 0.174176862
## everything 0.174008660
## for. 0.159275082
## house 0.158654590
## guests 0.157091248
## alarm 0.156422459
## away 0.155452658
## travel 0.153770328
## amazing 0.151041946
## restaurants 0.149154980
## sydney 0.146412707
## hot 0.145216010
## wifi 0.143995686
## that 0.141876710
## over 0.141219781
## right 0.137663449
## manual 0.134973884
## size 0.134429080
## out 0.128180426
## ground 0.126768625
## great 0.125192742
## calculated_host_listings_count 0.123953405
## buses 0.123743888
## host_listings_count 0.122646182
## modern 0.119245777
## beach 0.119153122
## accommodates 0.117324139
## microwave 0.114674698
## very 0.113721757
## single 0.111492474
## double 0.111104354
## offer 0.107096570
## lockbox 0.106853067
## have 0.106157907
## king 0.105649837
## extra 0.105601234
## our 0.105538462
## this 0.104100850
## from 0.103952701
## like 0.103673118
## located 0.100630456
## station 0.094136552
## queen 0.093478042
## park 0.092687337
## premises 0.091224849
## host_is_superhost 0.086906712
## review_scores_cleanliness 0.086487433
## rooms 0.086426433
## blankets 0.084698169
## email 0.084143306
## unlimited 0.084085325
## bay 0.084083250
## level 0.082705929
## beaches 0.081180534
## are 0.081006391
## situated 0.077536782
## coffee 0.077508954
## garden 0.074978400
## offline 0.074687471
## beautiful 0.073126592
## minimum_maximum_nights 0.071464930
## paid 0.071000392
## aid 0.068752815
## allowed 0.068396547
## bathclass 0.068148023
## new 0.066885436
## bbq 0.065877101
## dropoff 0.063190317
## light 0.060163432
## harbour 0.058962693
## bathsum 0.057892041
## patio 0.054864501
## own 0.054280857
## oven 0.050163737
## can 0.050115263
## floor 0.048464846
## plus 0.045951253
## beds.1 0.044193785
## train 0.043100054
## crib 0.041087411
## bed 0.039789714
## pack 0.039680673
## enjoy 0.038381695
## peaceful 0.036718314
## extinguisher 0.036646199
## bright 0.036001845
## refrigerator 0.035837685
## review_scores_accuracy 0.035588748
## which 0.035470624
## across 0.034347017
## separate 0.031553276
## chair 0.031170407
## tea 0.029782335
## front 0.028914750
## available 0.028555134
## air 0.026182828
## bedrooms 0.025613137
## off 0.024917679
## sofa 0.024152449
## manly 0.022493610
## minutes 0.019576328
## space 0.019507136
## work 0.018899655
## stays 0.018608734
## cleaning 0.018499808
## lovely 0.018259539
## things 0.018227699
## shower 0.014635440
## many 0.011675872
## minute 0.011571822
## stay 0.011254694
## ocean 0.009773319
## location 0.006427817
## home 0.005306123
## views 0.004747560
## studio 0.004670207
## easy 0.003812323
## equipped 0.003579804
## laundry 0.002894869
## host_has_profile_pic 0.000000000
## host_identity_verified 0.000000000
## room_type 0.000000000
## host_total_listings_count 0.000000000
## minimum_nights 0.000000000
## maximum_maximum_nights 0.000000000
## maximum_nights_avg_ntm 0.000000000
## review_scores_checkin 0.000000000
## review_scores_communication 0.000000000
## review_scores_location 0.000000000
## review_scores_value 0.000000000
## calculated_host_listings_count_shared_rooms 0.000000000
## after 0.000000000
## airport 0.000000000
## airy 0.000000000
## also 0.000000000
## amenities 0.000000000
## any 0.000000000
## appliances 0.000000000
## area 0.000000000
## areas 0.000000000
## around 0.000000000
## australia 0.000000000
## back 0.000000000
## bars 0.000000000
## basics 0.000000000
## bath 0.000000000
## bathrooms 0.000000000
## bathtub 0.000000000
## bedrooms.1 0.000000000
## been 0.000000000
## before 0.000000000
## big 0.000000000
## block 0.000000000
## books 0.000000000
## both 0.000000000
## brand 0.000000000
## breakfast 0.000000000
## building 0.000000000
## built 0.000000000
## business 0.000000000
## but 0.000000000
## cable 0.000000000
## cafe 0.000000000
## car 0.000000000
## carbon 0.000000000
## cbd 0.000000000
## central 0.000000000
## centre 0.000000000
## children 0.000000000
## clean 0.000000000
## close 0.000000000
## comfortable 0.000000000
## comfy 0.000000000
## connection 0.000000000
## convenient 0.000000000
## coogee 0.000000000
## cooking 0.000000000
## cosy 0.000000000
## couple 0.000000000
## couples 0.000000000
## courtyard 0.000000000
## darkening 0.000000000
## day 0.000000000
## deck 0.000000000
## dining 0.000000000
## dishes 0.000000000
## distance 0.000000000
## down 0.000000000
## downstairs 0.000000000
## drive 0.000000000
## ensuite 0.000000000
## entire 0.000000000
## entrance 0.000000000
## essentials 0.000000000
## etc 0.000000000
## ethernet 0.000000000
## facebook 0.000000000
## facilities 0.000000000
## families 0.000000000
## family 0.000000000
## famous 0.000000000
## features 0.000000000
## feel 0.000000000
## ferry 0.000000000
## few 0.000000000
## filled 0.000000000
## fire 0.000000000
## fireplace 0.000000000
## first 0.000000000
## flat 0.000000000
## fridge 0.000000000
## friendly 0.000000000
## full 0.000000000
## fully 0.000000000
## furnished 0.000000000
## gas 0.000000000
## gel 0.000000000
## get 0.000000000
## good 0.000000000
## greets 0.000000000
## grill 0.000000000
## gym 0.000000000
## hair 0.000000000
## hangers 0.000000000
## heating 0.000000000
## high 0.000000000
## hills 0.000000000
## holiday 0.000000000
## host 0.000000000
## hotel 0.000000000
## huge 0.000000000
## ideal 0.000000000
## includes 0.000000000
## including 0.000000000
## indoor 0.000000000
## internal 0.000000000
## into 0.000000000
## iron 0.000000000
## its 0.000000000
## jumio 0.000000000
## junction 0.000000000
## kit 0.000000000
## laptop 0.000000000
## leafy 0.000000000
## linen 0.000000000
## local 0.000000000
## lock 0.000000000
## long 0.000000000
## looking 0.000000000
## lots 0.000000000
## lounge 0.000000000
## love 0.000000000
## luggage 0.000000000
## luxury 0.000000000
## machine 0.000000000
## main 0.000000000
## make 0.000000000
## master 0.000000000
## monoxide 0.000000000
## more 0.000000000
## most 0.000000000
## natural 0.000000000
## near 0.000000000
## need 0.000000000
## netflix 0.000000000
## newly 0.000000000
## next. 0.000000000
## nice 0.000000000
## night 0.000000000
## north 0.000000000
## offers 0.000000000
## only 0.000000000
## open 0.000000000
## outdoor 0.000000000
## outside 0.000000000
## parks 0.000000000
## perfect 0.000000000
## phone 0.000000000
## pillows 0.000000000
## plan 0.000000000
## play 0.000000000
## plenty 0.000000000
## pool 0.000000000
## property 0.000000000
## provided 0.000000000
## public 0.000000000
## quality 0.000000000
## relax 0.000000000
## relaxing 0.000000000
## renovated 0.000000000
## road 0.000000000
## second 0.000000000
## secure 0.000000000
## self 0.000000000
## set 0.000000000
## shades 0.000000000
## shampoo 0.000000000
## share 0.000000000
## shared 0.000000000
## shopping 0.000000000
## shops 0.000000000
## short 0.000000000
## silverware 0.000000000
## sized 0.000000000
## small 0.000000000
## smoke 0.000000000
## some 0.000000000
## south 0.000000000
## stove 0.000000000
## stroll 0.000000000
## stunning 0.000000000
## style 0.000000000
## stylish 0.000000000
## suite 0.000000000
## summer 0.000000000
## sun 0.000000000
## sunny 0.000000000
## surry 0.000000000
## swimming 0.000000000
## table 0.000000000
## take 0.000000000
## term 0.000000000
## than 0.000000000
## three 0.000000000
## throughout 0.000000000
## time 0.000000000
## towels 0.000000000
## toys 0.000000000
## transport 0.000000000
## tub 0.000000000
## two 0.000000000
## u2019n 0.000000000
## u2019s 0.000000000
## unit 0.000000000
## use 0.000000000
## utensils 0.000000000
## view 0.000000000
## village 0.000000000
## wales 0.000000000
## walking 0.000000000
## want 0.000000000
## wardrobe 0.000000000
## washer 0.000000000
## welcome 0.000000000
## well 0.000000000
## where 0.000000000
## while. 0.000000000
## who 0.000000000
## whole 0.000000000
## will 0.000000000
## workspace 0.000000000
dfht<-summary(modelgbm,n.trees=bestntrees)
ggplot(aes(x=reorder(var,rel.inf),y=rel.inf),data=dfht[1:30,])+
geom_bar(stat="identity",fill="blue")+coord_flip() #Independent variable importance drawing
head(dfht,40)
## var
## reviews_per_month reviews_per_month
## neighbourhood_cleansed neighbourhood_cleansed
## reviews reviews
## number_of_reviews_ltm number_of_reviews_ltm
## availability_365 availability_365
## longitude longitude
## selfie selfie
## city city
## review_scores_rating review_scores_rating
## availability_90 availability_90
## one one
## art art
## mins mins
## availability_30 availability_30
## bondi bondi
## street street
## people people
## price price
## identity identity
## the the
## best best
## bedroom bedroom
## apartment apartment
## availability_60 availability_60
## dishwasher dishwasher
## quiet quiet
## calculated_host_listings_count_private_rooms calculated_host_listings_count_private_rooms
## number_of_reviews_l30d number_of_reviews_l30d
## kitchen kitchen
## guest guest
## spacious spacious
## government government
## maximum_nights maximum_nights
## dryer dryer
## backyard backyard
## within within
## latitude latitude
## elevator elevator
## bus bus
## darling darling
## rel.inf
## reviews_per_month 24.6135030
## neighbourhood_cleansed 12.2659339
## reviews 8.5601038
## number_of_reviews_ltm 5.3575684
## availability_365 4.2537431
## longitude 1.6231722
## selfie 1.5088598
## city 1.4602227
## review_scores_rating 1.2474903
## availability_90 1.2362198
## one 1.1184317
## art 1.0333838
## mins 0.9719545
## availability_30 0.9079579
## bondi 0.9002628
## street 0.8777152
## people 0.8121217
## price 0.7008814
## identity 0.6729106
## the 0.6628033
## best 0.6213094
## bedroom 0.6029301
## apartment 0.5931579
## availability_60 0.5728179
## dishwasher 0.5663411
## quiet 0.5519391
## calculated_host_listings_count_private_rooms 0.5476086
## number_of_reviews_l30d 0.5467569
## kitchen 0.5467450
## guest 0.5369792
## spacious 0.5067714
## government 0.5037656
## maximum_nights 0.4979548
## dryer 0.4613639
## backyard 0.4457613
## within 0.4331536
## latitude 0.4269324
## elevator 0.4214884
## bus 0.3913773
## darling 0.3891802
pretest<-predict(modelgbm,n.trees=bestntrees,dftest)
library(pROC)
## Warning: package 'pROC' was built under R version 3.6.3
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
roc1<-roc(dftest$istop100,pretest,print.auc=T,plot=T)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
bestdd<-coords(roc1,"best")[1,1]
## Warning in coords.roc(roc1, "best"): The 'transpose' argument to FALSE
## by default since pROC 1.16. Set transpose = TRUE explicitly to revert to
## the previous behavior, or transpose = TRUE to silence this warning. Type
## help(coords_transpose) for additional information.
ma<-table(dftest$istop100,ifelse(pretest>bestdd,1,0))
ma
##
## 0 1
## 0 3778 270
## 1 1 39
sum(diag(ma))/sum(ma)
## [1] 0.9337084
coords(roc1,"best")
## Warning in coords.roc(roc1, "best"): The 'transpose' argument to FALSE
## by default since pROC 1.16. Set transpose = TRUE explicitly to revert to
## the previous behavior, or transpose = TRUE to silence this warning. Type
## help(coords_transpose) for additional information.
## threshold specificity sensitivity
## 1 -6.226547 0.9333004 0.975